#Dennis Moskov, Master Thesis
#dissMtering promoter, base metals,support and preparation methods into catalyst groups
#dissMtering by Partitioning Around Medoids (PAM)
#use "fpc" and "cluster" packages
     
#install.packages("fpc")
#install.packages("cluster")
#library(fpc)
#library(cluster)


#chose variables for clustering
DBc<-DB[2:17]

#create dissimilarity object
dissM<-daisy(DBc, metric = "gower")    #mixed variables

#maximum cluster
k=15

#find best number of cluster
paF<-pamk(dissM,krange=2:k,criterion="multiasw", usepam=FALSE, scaling=FALSE)
paT<-pamk(dissM,krange=2:k,criterion="multiasw", usepam=TRUE, scaling=FALSE)


#paF[[1]][3]     			#medoids
#paF[[2]]       			#number of clusters
#paF[[1]][6]     			#cluster summary
#paF[[1]][9]$silinfo$avg.width          #Silhouette width
#paF[[1]][4]    			#Cluster

#paT[[1]][1]     			#medoids
#paT[[2]]         			#number of clusters
#paT[[1]][6]     			#cluster summary
#paT[[1]][7]$silinfo$avg.width   	#Silhouette width
#paT[[1]][3]     			#Cluster


#plot clustering with higher silhuette width

if (paF[[1]][9]$silinfo$avg.width > paT[[1]][7]$silinfo$avg.width) {
	nc=paF[[2]]
	cl=paF[[1]][4]  
} else {  
	nc=paT[[2]]
	cl=paT[[1]][3]
}


plot(silhouette(cl, dissM),main="Silhouette Plot of Clusters")
write.csv(cl,file="clusters.csv")



#add clusters to data base
DBc<-cbind(DB[1],cl,DB[-(1:17)])
DBc$clustering<-as.factor(DBc$clustering)








